# Replication Archive for: Aggarwal, Minali, Jennifer Allen, Alex Coppock, 
# Dan Frankowski, Sol Messing, Kelly Zhang, James Barnes, Andrew Beasley, 
# Harry Hantman, and Sylvan Zheng: The impact of digital campaign advertising 
# during the 2020 US presidential election: evidence from survey experiments, 
# field experiments, and a campaign-level experiment. 
# Nature Human Behavior, forthcoming.

rm(list = ls())
library(tidyverse)
library(estimatr)
library(xtable)
source('helper_file.R')

# load data and uncount
aggregated_set <- read_rds(file = "aggregated_analysis_set.rds")
main_analysis_set <- uncount(aggregated_set, weights = n)

formula_1 <- formula("voted_in_2020 ~ condition") 
formula_2 <- formula("voted_in_2020 ~ condition + num_times_voted + tss_100 + pts_100")
formula_3 <- formula("voted_in_2020 ~ condition + pts_100 + tss_100 + voted_in_2000 + voted_in_2002 + voted_in_2004 + voted_in_2006 + voted_in_2008 + voted_in_2010 + voted_in_2012 + voted_in_2014 + voted_in_2016 + voted_in_2018 + strata + party_dem + party_rep + party_unknown")

fit_ate_1 <-
  lm_robust(
    formula = formula_1,
    weights = ipw,
    data = main_analysis_set
  ) %>% tidy

fit_ate_2 <-
  lm_robust(
    formula = formula_2,
    weights = ipw,
    data = main_analysis_set
  ) %>% tidy

fit_ate_3 <-
  lm_robust(
    formula = formula_3,
    weights = ipw,
    data = main_analysis_set
  ) %>% tidy


tss_cates_1 <-
  main_analysis_set %>%
  group_by(tss_bucket) %>%
  do(tidy(lm_robust(
    formula = formula_1,
    weights = ipw,
    data = .
  )))

tss_cates_2 <-
  main_analysis_set %>%
  group_by(tss_bucket) %>%
  do(tidy(lm_robust(
    formula = formula_2,
    weights = ipw,
    data = .
  )))

tss_cates_3 <-
  main_analysis_set %>%
  group_by(tss_bucket) %>%
  do(tidy(lm_robust(
    formula = formula_3,
    weights = ipw,
    data = .
  )))


race_cates_1 <-
  main_analysis_set %>%
  group_by(race) %>%
  do(tidy(lm_robust(
    formula = formula_1,
    weights = ipw,
    data = .
  )))

race_cates_2 <-
  main_analysis_set %>%
  group_by(race) %>%
  do(tidy(lm_robust(
    formula = formula_2,
    weights = ipw,
    data = .
  )))

race_cates_3 <-
  main_analysis_set %>%
  group_by(race) %>%
  do(tidy(lm_robust(
    formula = formula_3,
    weights = ipw,
    data = .
  )))


gender_cates_1 <-
  main_analysis_set %>%
  group_by(gender) %>%
  do(tidy(lm_robust(
    formula = formula_1,
    weights = ipw,
    data = .
  )))

gender_cates_2 <-
  main_analysis_set %>%
  group_by(gender) %>%
  do(tidy(lm_robust(
    formula = formula_2,
    weights = ipw,
    data = .
  )))

gender_cates_3 <-
  main_analysis_set %>%
  group_by(gender) %>%
  do(tidy(lm_robust(
    formula = formula_3,
    weights = ipw,
    data = .
  )))


age_cates_1 <-
  main_analysis_set %>%
  group_by(agecat) %>%
  do(tidy(lm_robust(
    formula = formula_1,
    weights = ipw,
    data = .
  )))

age_cates_2 <-
  main_analysis_set %>%
  group_by(agecat) %>%
  do(tidy(lm_robust(
    formula = formula_2,
    weights = ipw,
    data = .
  )))

age_cates_3 <-
  main_analysis_set %>%
  group_by(agecat) %>%
  do(tidy(lm_robust(
    formula = formula_3,
    weights = ipw,
    data = .
  )))


close_margins_2016_cates_1 <-
  main_analysis_set %>%
  group_by(close_margins_2016) %>%
  do(tidy(lm_robust(
    formula = formula_1,
    weights = ipw,
    data = .
  )))

close_margins_2016_cates_2 <-
  main_analysis_set %>%
  group_by(close_margins_2016) %>%
  do(tidy(lm_robust(
    formula = formula_2,
    weights = ipw,
    data = .
  )))

close_margins_2016_cates_3 <-
  main_analysis_set %>%
  group_by(close_margins_2016) %>%
  do(tidy(lm_robust(
    formula = formula_3,
    weights = ipw,
    data = .
  )))


party_cates_1 <-
  main_analysis_set %>%
  group_by(party) %>%
  do(tidy(lm_robust(
    formula = formula_1,
    weights = ipw,
    data = .
  )))

party_cates_2 <-
  main_analysis_set %>%
  group_by(party) %>%
  do(tidy(lm_robust(
    formula = formula_2,
    weights = ipw,
    data = .
  )))

formula_3_party <- formula("voted_in_2020 ~ condition + pts_100 + tss_100 + 
                           voted_in_2000 + voted_in_2002 + voted_in_2004 + 
                           voted_in_2006 + voted_in_2008 + voted_in_2010 + 
                           voted_in_2012 + voted_in_2014 + voted_in_2016 + 
                           voted_in_2018 + strata")

party_cates_3 <-
  main_analysis_set %>%
  group_by(party) %>%
  do(tidy(lm_robust(
    formula = formula_3_party,
    weights = ipw,
    data = .
  )))


estimates_df_1 <-
  bind_rows(
    `ATE` = fit_ate_1,
    `Gender` = gender_cates_1,
    `Race` = race_cates_1,
    `Age` = age_cates_1,
    `Margin` = close_margins_2016_cates_1,
    `Trump support` = tss_cates_1,
    `Partisanship` = party_cates_1,
    .id = "covariate"
  )

estimates_df_2 <-
  bind_rows(
    `ATE` = fit_ate_2,
    `Gender` = gender_cates_2,
    `Race` = race_cates_2,
    `Age` = age_cates_2,
    `Margin` = close_margins_2016_cates_2,
    `Trump support` = tss_cates_2,
    `Partisanship` = party_cates_2,
    .id = "covariate"
  )

estimates_df_3 <-
  bind_rows(
    `ATE` = fit_ate_3,
    `Gender` = gender_cates_3,
    `Race` = race_cates_3,
    `Age` = age_cates_3,
    `Margin` = close_margins_2016_cates_3,
    `Trump support` = tss_cates_3,
    `Partisanship` = party_cates_3,
    .id = "covariate"
  )


gg_df <-
  bind_rows(
    `Unadjusted` = estimates_df_1,
    `PAP adjustment set` = estimates_df_2,
    `Full adjustment set` = estimates_df_3,
    .id = "estimator",
  ) %>%
  filter(term == "conditiontreat") %>%
  mutate(
    covariate_value = coalesce(gender, race, agecat, close_margins_2016, tss_bucket, party),
    covariate_value = replace_na(covariate_value, "ATE"),
    covariate = factor(
      covariate,
      levels = c(
        "ATE",
        "Age",
        "Gender",
        "Race",
        "Margin",
        "Partisanship",
        "Trump support"
      )
    ),
    entry = make_se_entry(estimate, std.error, digits = 3),
    estimator = factor(estimator, levels = c("Unadjusted", "PAP adjustment set", "Full adjustment set"))
  )


g <-
  ggplot(data = gg_df, aes(estimate, covariate_value)) +
  geom_point() +
  geom_text(
    aes(label = entry, x = estimate + sign(estimate) * 0.004),
    size = 2,
    nudge_y = 0.25,
    color = gray(0.45)
  ) +
  geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
  facet_grid(rows = vars(covariate),
             cols = vars(estimator),
             scales = "free_y", space = "free") +
  geom_vline(xintercept = 0,
             color = "red",
             linetype = "dotted",
             alpha = 0.5) +
  coord_cartesian(xlim = c(-0.02, 0.02)) +
  scale_x_continuous(breaks = c(-0.01, 0.01)) +
  theme_minimal() +
  theme(axis.title.y = element_blank(),
        panel.grid.minor.x = element_blank()) +
  xlab("Average and Conditional Average Treatment Effect Estimates")

#ggsave("output/figure_1.pdf", g, width = 6.5, height = 6)

# Ns for caption:

main_analysis_set |> group_by(tss_bucket) |> summarize(n = n())
main_analysis_set |> group_by(race) |> summarize(n = n())
main_analysis_set |> group_by(gender) |> summarize(n = n())
main_analysis_set |> group_by(agecat) |> summarize(n = n())
main_analysis_set |> group_by(close_margins_2016) |> summarize(n = n())
main_analysis_set |> group_by(party) |> summarize(n = n())

# Ns: Total: 1,999,282. Age: 18-39: 1,379,017; 40+: 620,265. Gender: Female: 978,041; Other: 1,021,241. Race: Black: 233,546; Latinx: 179,036; White: 1,531,129; Other: 55,571. Margin: less than 3pp: 1,337,057; more than 3pp: 662,225. Partisanship: Democrat: 182,945; Republican: 71,875; Unknown: 1,442,071; Other: 302,391. Trump support: 30 to 40: 522,918; 40 to 50: 485,371; 50 to 60: 478,333; 60 to 70: 512,660. 





# Table_F1 --------------------------------------------------

table_f1 <-
  gg_df |>
  select(
    covariate,
    covariate_value,
    estimator,
    estimate,
    std.error,
    df,
    statistic,
    p.value,
    conf.low,
    conf.high
  ) |>
  arrange(covariate, covariate_value, estimator) |>
  xtable(digits = c(0, 0, 0, 0, 3, 3, 0, 3, 3, 3, 3))

# print.xtable(
#   table_f1,
#   include.colnames = FALSE,
#   include.rownames = FALSE,
#   only.contents = TRUE,
#   hline.after = c(),
#   file = "output/table_F1.tex"
# )






